#  wordspace::dist.matrix
#  quanteda::textstat_dist
# package_Imports_count <- data_package %>%   
#   unnest_tokens(word,Imports) %>%
#   filter(grepl("[A-Za-z]+",word)) %>%
#   count(Package,word)

# x <- left_join(package_Imports_count,data_package) %>%
#   bind_tf_idf(word,Package,n) %>% 
#   filter(tf_idf>1)

package_Imports <- data_package %>%   
  unnest_tokens(word,Imports) %>%
  filter(grepl("[A-Za-z]+",word)) %>%
  count(Package,word) %>%
  bind_tf_idf(word,Package,n) %>% 
  filter(tf_idf>1) %>%
  cast_dfm(document = Package,term = word,value = n) %>%
  textstat_dist() %>%
  cmdscale() %>%
  as.data.frame() %>%
  mutate(Package = row.names(.))
  

names(package_Imports) <- c("x","y","Package")
package_Imports <- left_join(package_Imports,data_package)
## Joining, by = "Package"
# package_Author <- data_package$Author[data_package$Package %in% row.names(package_Imports)]
# 
is_RStudio <- ifelse(grepl("RStudio",package_Imports$Author),"RStudio","others")

package_Imports %>% nrow() %>% print()
## [1] 7223
hchart(package_Imports,"scatter", hcaes(x=x,y=y,group = is_RStudio,label = Package ,value = Description)) %>%
  hc_title(text ="Imports") %>%
  hc_chart(zoomType ="xy") %>%
  hc_tooltip(formatter = JS("function () {
                                   return '<b>' + this.point.label + '</b><br/>'+'Description: '+this.point.value;}"))
## Warning: `parse_quosure()` is deprecated as of rlang 0.2.0.
## Please use `parse_quo()` instead.
## This warning is displayed once per session.
## Warning: `as_data_frame()` is deprecated as of tibble 2.0.0.
## Please use `as_tibble()` instead.
## The signature and semantics have changed, see `?as_tibble`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
# hchart(package_Imports,"scatter", hcaes(x=x,y=y,label = row.names(package_Imports))) %>%
#   hc_title(text ="Imports") %>%
#   hc_tooltip(formatter = JS("function () {
#                                    return '<b>' + this.point.label + '</b>';}"))
package_Description <- data_package %>%   
  unnest_tokens(word,Description) %>%
  filter(grepl("[A-Za-z]+",word)) %>%
  count(Package,word,sort = T) %>%
  bind_tf_idf(word,Package,n) %>% 
  filter(tf_idf>1) %>%
  cast_dfm(document = Package,term = word,value = n) %>%
  textstat_dist() %>%
  cmdscale() %>%
  as.data.frame() %>%
  mutate(Package = row.names(.))
  

names(package_Description) <- c("x","y","Package")
package_Description <- left_join(package_Description,data_package)
## Joining, by = "Package"
is_RStudio <- ifelse(grepl("RStudio",package_Description$Author),"RStudio","others")

package_Description %>% nrow() %>% print()
## [1] 484
hchart(package_Description,"scatter", hcaes(x=x,y=y,group = is_RStudio,label = Package ,value = Description)) %>%
  hc_title(text ="Description") %>%
  hc_chart(zoomType ="xy") %>%
  hc_tooltip(formatter = JS("function () {
                                   return '<b>' + this.point.label + '</b><br/>'+'Description: '+this.point.value;}"))
# hchart(package_Description,"scatter", hcaes(x=x,y=y,group = is_RStudio,label = data_package$Package ,value = data_package$Description)) %>%
#   hc_title(text ="Description") %>%
#   hc_tooltip(formatter = JS("function () {
#                                    return '<b>' + this.point.label + '</b><br/>'+'num: '+this.point.value;}"))
# hchart(package_Description,"scatter", hcaes(x=x,y=y,label = row.names(package_Description))) %>%
#   hc_title(text ="Description") %>%
#   hc_tooltip(formatter = JS("function () {
#                                    return '<b>' + this.point.label + '</b>';}"))
package_Title <- data_package %>% 
  mutate(text= paste(Package,Title)) %>%   
  unnest_tokens(word,text) %>%
  filter(grepl("[A-Za-z]+",word)) %>%
  count(Package,word,sort = T) %>%
  bind_tf_idf(word,Package,n) %>% 
  filter(tf_idf>1) %>%
  cast_dfm(document = Package,term = word,value = n) %>%
  textstat_dist() %>%
  cmdscale() %>%
  as.data.frame() %>%
  mutate(Package = row.names(.))
  

names(package_Title) <- c("x","y","Package")
package_Title <- left_join(package_Title,data_package)
## Joining, by = "Package"
is_RStudio <- ifelse(grepl("RStudio",package_Title$Author),"RStudio","others")

package_Title %>% nrow() %>% print()
## [1] 12732
hchart(package_Title,"scatter", hcaes(x=x,y=y,group = is_RStudio,label = Package ,value = Description)) %>%
  hc_title(text ="Title") %>%
  hc_chart(zoomType ="xy") %>%
  hc_tooltip(formatter = JS("function () {
                                   return '<b>' + this.point.label + '</b><br/>'+'Description: '+this.point.value;}"))
# names(package_Title) <- c("x","y")


# hchart(package_Title,"scatter", hcaes(x=x,y=y,group = is_RStudio,label = data_package$Package ,value = data_package$Description)) %>%
#   hc_title(text ="Title") %>%
#   hc_tooltip(formatter = JS("function () {
#                                    return '<b>' + this.point.label + '</b><br/>'+'num: '+this.point.value;}"))
# hchart(package_Title,"scatter", hcaes(x=x,y=y,label = row.names(package_Title))) %>%
#   hc_title(text ="Title") %>%
#   hc_tooltip(formatter = JS("function () {
#                                    return '<b>' + this.point.label + '</b>';}"))
package_all <- data_package %>% 
  mutate(text= paste(Package,Title,Description)) %>%   
  unnest_tokens(word,text) %>%
  filter(grepl("[A-Za-z]+",word)) %>%
  count(Package,word,sort = T) %>%
  bind_tf_idf(word,Package,n) %>% 
  filter(tf_idf>1) %>%
  cast_dfm(document = Package,term = word,value = n) %>%
  textstat_dist() %>%
  cmdscale() %>%
  as.data.frame() %>%
  mutate(Package = row.names(.))
  

names(package_all) <- c("x","y","Package")
package_all <- left_join(package_all,data_package)
## Joining, by = "Package"
is_RStudio <- ifelse(grepl("RStudio",package_all$Author),"RStudio","others")

package_all %>% nrow() %>% print()
## [1] 732
hchart(package_all,"scatter", hcaes(x=x,y=y,group = is_RStudio,label = Package ,value = Description)) %>%
  hc_title(text ="all") %>%
  hc_chart(zoomType ="xy") %>%
  hc_tooltip(formatter = JS("function () {
                                   return '<b>' + this.point.label + '</b><br/>'+'Description: '+this.point.value;}"))
# names(package_Title) <- c("x","y")


# hchart(package_Title,"scatter", hcaes(x=x,y=y,group = is_RStudio,label = data_package$Package ,value = data_package$Description)) %>%
#   hc_title(text ="Title") %>%
#   hc_tooltip(formatter = JS("function () {
#                                    return '<b>' + this.point.label + '</b><br/>'+'num: '+this.point.value;}"))
# hchart(package_Title,"scatter", hcaes(x=x,y=y,label = row.names(package_Title))) %>%
#   hc_title(text ="Title") %>%
#   hc_tooltip(formatter = JS("function () {
#                                    return '<b>' + this.point.label + '</b>';}"))
package_Imports_uni <- data_package %>%   
  unnest_tokens(word,Imports) %>%
  filter(grepl("[A-Za-z]+",word)) %>%
  count(Package,word) %>%
  bind_tf_idf(word,Package,n) %>% 
  filter(tf_idf>1) %>%
  cast_dfm(document = Package,term = word,value = n) %>%
  as.matrix() %>%
  unique()

package_Imports <- package_Imports_uni %>%
  Rtsne() %$%
  Y %>% 
  as.data.frame() %>% 
  mutate(Package = row.names(package_Imports_uni))



names(package_Imports) <- c("x","y","Package")
package_Imports <- left_join(package_Imports,data_package)
## Joining, by = "Package"
is_RStudio <- ifelse(grepl("RStudio",package_Imports$Author),"RStudio","others")

package_Imports %>% nrow() %>% print()
## [1] 4010
hchart(package_Imports,"scatter", hcaes(x=x,y=y,group = is_RStudio,label = Package ,value = Description)) %>%
  hc_title(text ="Imports") %>%
  hc_chart(zoomType ="xy") %>%
  hc_tooltip(formatter = JS("function () {
                                   return '<b>' + this.point.label + '</b><br/>'+'Description: '+this.point.value;}"))
package_Description_uni <- data_package %>%   
  unnest_tokens(word,Description) %>%
  filter(grepl("[A-Za-z]+",word)) %>%
  count(Package,word) %>%
  bind_tf_idf(word,Package,n) %>% 
  filter(tf_idf>1) %>%
  cast_dfm(document = Package,term = word,value = n) %>%
  as.matrix() %>%
  unique()

package_Description <- package_Description_uni %>%
  Rtsne() %$%
  Y %>% 
  as.data.frame() %>% 
  mutate(Package = row.names(package_Description_uni))



names(package_Description) <- c("x","y","Package")
package_Description <- left_join(package_Description,data_package)
## Joining, by = "Package"
is_RStudio <- ifelse(grepl("RStudio",package_Description$Author),"RStudio","others")

package_Description %>% nrow() %>% print()
## [1] 483
hchart(package_Description,"scatter", hcaes(x=x,y=y,group = is_RStudio,label = Package ,value = Description)) %>%
  hc_title(text ="Description") %>%
  hc_chart(zoomType ="xy") %>%
  hc_tooltip(formatter = JS("function () {
                                   return '<b>' + this.point.label + '</b><br/>'+'Description: '+this.point.value;}"))
package_Title_uni <- data_package %>% 
  mutate(text= paste(Package,Title)) %>%
  unnest_tokens(word,text) %>%
  filter(grepl("[A-Za-z]+",word)) %>%
  count(Package,word) %>%
  bind_tf_idf(word,Package,n) %>% 
  filter(tf_idf>1) %>%
  cast_dfm(document = Package,term = word,value = n) %>%
  as.matrix() %>%
  unique()

package_Title <- package_Title_uni %>%
  normalize_input() %>% 
  Rtsne() %$%
  Y %>% 
  as.data.frame() %>% 
  mutate(Package = row.names(package_Title_uni))



names(package_Title) <- c("x","y","Package")
package_Title <- left_join(package_Title,data_package)
## Joining, by = "Package"
is_RStudio <- ifelse(grepl("RStudio",package_Title$Author),"RStudio","others")

package_Title %>% nrow() %>% print()
## [1] 12718
hchart(package_Title,"scatter", hcaes(x=x,y=y,group = is_RStudio,label = Package ,value = Description)) %>%
  hc_title(text ="Title") %>%
  hc_chart(zoomType ="xy") %>%
  hc_tooltip(formatter = JS("function () {
                                   return '<b>' + this.point.label + '</b><br/>'+'Description: '+this.point.value;}"))
package_all_uni <- data_package %>% 
  mutate(text= paste(Package,Title,Description)) %>%
  unnest_tokens(word,text) %>%
  filter(grepl("[A-Za-z]+",word)) %>%
  count(Package,word) %>%
  bind_tf_idf(word,Package,n) %>% 
  filter(tf_idf>1) %>%
  cast_dfm(document = Package,term = word,value = n) %>%
  as.matrix() %>%
  unique()

package_all <- package_all_uni %>%
  normalize_input() %>% 
  Rtsne() %$%
  Y %>% 
  as.data.frame() %>% 
  mutate(Package = row.names(package_all_uni))



names(package_all) <- c("x","y","Package")
package_all <- left_join(package_all,data_package)
## Joining, by = "Package"
is_RStudio <- ifelse(grepl("RStudio",package_all$Author),"RStudio","others")

package_all %>% nrow() %>% print()
## [1] 719
hchart(package_all,"scatter", hcaes(x=x,y=y,group = is_RStudio,label = Package ,value = Description)) %>%
  hc_title(text ="all") %>%
  hc_chart(zoomType ="xy") %>%
  hc_tooltip(formatter = JS("function () {
                                   return '<b>' + this.point.label + '</b><br/>'+'Description: '+this.point.value;}"))